# Load in rooted phylogenetic tree!
load("data/03_Phylogenetic_Tree/phytree_preprocessed_physeq.RData")
unrooted_physeq## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1736 taxa and 11 samples ]
## sample_data() Sample Data: [ 11 samples by 23 sample variables ]
## tax_table() Taxonomy Table: [ 1736 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1736 tips and 1734 internal nodes ]
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1736 taxa and 11 samples ]
## sample_data() Sample Data: [ 11 samples by 23 sample variables ]
## tax_table() Taxonomy Table: [ 1736 taxa by 9 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1736 tips and 1735 internal nodes ]
# Sequence depth will inform us on how we want to normalize our data
# Calculate the total number of reads per sample
raw_total_seqs_df <-
unrooted_physeq %>%
# Calculate the sample read sums
sample_sums() %>%
data.frame()
# Name the column
colnames(raw_total_seqs_df)[1] <- "TotalSeqs"
head(raw_total_seqs_df)## TotalSeqs
## 568_4 24503
## 568_5 17303
## 581_5 15963
## 611_5 13893
## E05_5 45378
## E1_4 43941
### scale_reads function and matround function
####################################################################################
# Function to scale reads: http://deneflab.github.io/MicrobeMiseq/
# Scales reads by
# 1) taking proportions
# 2) multiplying by a given library size of n
# 3) rounding
# Default for n is the minimum sample size in your library
# Default for round is floor
matround <- function(x){trunc(x+0.5)}
scale_reads <- function(physeq, n = min(sample_sums(physeq)), round = "round") {
# transform counts to n
physeq.scale <- transform_sample_counts(physeq, function(x) {(n * x/sum(x))})
# Pick the rounding functions
if (round == "floor"){
otu_table(physeq.scale) <- floor(otu_table(physeq.scale))
} else if (round == "round"){
otu_table(physeq.scale) <- round(otu_table(physeq.scale))
} else if (round == "matround"){
otu_table(physeq.scale) <- matround(otu_table(physeq.scale))
}
# Prune taxa and return new phyloseq object
physeq.scale <- prune_taxa(taxa_sums(physeq.scale) > 0, physeq.scale)
return(physeq.scale)
}Rescale all reads so they all represent the count of the lowest number of sequence reads. We will expect each sample to have # of reads around 2200
This is where one might decide to use rarefaction to normalize the data.
## [1] 7142
# Scale reads by the above function
scaled_rooted_physeq <-
midroot_physeq %>%
scale_reads(round = "matround")
# Calculate read depth
## Look at total number of sequences in each sample and compare to what we had before
scaled_total_seqs_df <-
scaled_rooted_physeq %>%
sample_sums() %>%
data.frame()
head(scaled_total_seqs_df)## .
## 568_4 7143
## 568_5 7134
## 581_5 7140
## 611_5 7159
## E05_5 7149
## E1_4 7137
# Change first column name to be "TotalSeqs"
colnames(scaled_total_seqs_df)[1] <- "TotalSeqs"
# Inspect
head(scaled_total_seqs_df)## TotalSeqs
## 568_4 7143
## 568_5 7134
## 581_5 7140
## 611_5 7159
## E05_5 7149
## E1_4 7137
# Check range of data
min_seqs <-
min(scaled_total_seqs_df)
max_seqs <-
max(scaled_total_seqs_df)
# Range of seqs
max_seqs - min_seqs## [1] 25
# Plot histogram
scaled_total_seqs_df %>%
ggplot(aes(x = TotalSeqs)) +
geom_histogram(bins = 50) +
scale_x_continuous(limits = c(0, 10000)) +
labs(title = "Scaled Sequencing Depth at 7131") +
theme_bw()## Warning: Removed 2 rows containing missing values (`geom_bar()`).
## TotalSeqs
## 568_4 7143
## 568_5 7134
## 581_5 7140
## 611_5 7159
## E05_5 7149
## E1_4 7137
Exploratory analyses from the Paily & Shankar (2016) paper, which is using unconstrained ordination methods like PCoA.
# Calculate sorenson dissimularity: Abundance-unweighted of shared taxa
scaled_soren_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "bray", binary = TRUE)
#str(scaled_soren_pcoa)
# Plot the ordination
soren_gut_section_pcoa <- plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_soren_pcoa,
color = "gut_section",
shape = "gut_section",
title = "Sorensen PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16)) +
scale_color_manual(values = gut_section_colors) +
theme_bw()
# Show the plot
soren_gut_section_pcoaHere, we are evaluating the shared taxa by presence/absence (abundance-unweighted) in the Sorensen metric.
Note that:
This means we explain 45% of the variation in the data in these two axes.
# Calculate the BC distance
scaled_BC_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "bray")
# Plot the PCoA
bray_gut_section_pcoa <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_BC_pcoa,
color = "gut_section",
shape = "gut_section",
title = "Bray-Curtis PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16)) +
scale_color_manual(values = c(gut_section_colors)) +
theme_bw()
bray_gut_section_pcoaHere, we are evaluating the shared taxa and then weighting them by their abundances, which provides more influence for species that are more abundant.
Note that:
This means we explain 44% of the variation in the data in these two axes, which is almost exactly the same as the previous plot with the Sorensen Dissimilarity. Abundance does NOT seem to have an influence!!
# Calculate the BC distance
scaled_wUNI_pcoa <-
ordinate(
physeq = scaled_rooted_physeq,
method = "PCoA",
distance = "wunifrac")
# Plot the PCoA
wUNI_gut_section_pcoa <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_pcoa,
color = "gut_section",
shape = "gut_section",
title = "Weighted Unifrac PCoA") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16,17,18, 19, 20)) +
scale_color_manual(values = c(gut_section_colors)) +
theme_bw()
wUNI_gut_section_pcoaHere, we are evaluating the shared taxa and then weighting them by their abundances, which provides more influence for species that are more abundant.
Note that:
This means we explain 82% of the variation in the data in these two axes (!!!), which is significantly more than the previous plots with the taxonomic dissimilarity measures. (Almost double.) Here, phylogeny seems to be very important! This means that taxa that are abundant are found in different places in the phylogenetic tree. Therefore, the evoultionary distances (aka the branch lengths) and their abundances seem to have a major influence!!
Let’s plot all three together into one plot to have a concise visualization of the three metrics.
(soren_gut_section_pcoa + theme(legend.position = "none")) +
(bray_gut_section_pcoa + theme(legend.position = "none")) +
(wUNI_gut_section_pcoa + theme(legend.position = "none"))Since we did 3 of the dissimilarity metrics for the PCoA, let’s just plot one example of them for the NMDS plotting. Here, we will use weighted Unifrac
# Calculate the Weighted Unifrac distance
scaled_wUNI_nmds <-
ordinate(
physeq = scaled_rooted_physeq,
method = "NMDS",
distance = "wunifrac")## Run 0 stress 0.01900662
## Run 1 stress 0.01822508
## ... New best solution
## ... Procrustes: rmse 0.06232982 max resid 0.1340842
## Run 2 stress 0.01822507
## ... New best solution
## ... Procrustes: rmse 1.03531e-05 max resid 2.638536e-05
## ... Similar to previous best
## Run 3 stress 0.04554822
## Run 4 stress 0.2091265
## Run 5 stress 0.21255
## Run 6 stress 0.01900669
## Run 7 stress 0.2095517
## Run 8 stress 0.01822508
## ... Procrustes: rmse 6.927923e-06 max resid 1.846096e-05
## ... Similar to previous best
## Run 9 stress 0.04002026
## Run 10 stress 0.01900668
## Run 11 stress 0.01900655
## Run 12 stress 0.04554816
## Run 13 stress 0.2069593
## Run 14 stress 0.02005189
## Run 15 stress 0.2123778
## Run 16 stress 0.03998939
## Run 17 stress 0.01822508
## ... Procrustes: rmse 1.031921e-05 max resid 2.747214e-05
## ... Similar to previous best
## Run 18 stress 0.01900674
## Run 19 stress 0.2569087
## Run 20 stress 0.01900653
## *** Best solution repeated 3 times
# Plot the PCoA
wUNI_gut_section_nmds <-
plot_ordination(
physeq = scaled_rooted_physeq,
ordination = scaled_wUNI_nmds,
color = "gut_section") +
geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
scale_shape_manual(values = c(15,16)) +
scale_color_manual(values = c(gut_section_colors)) +
theme_bw() + labs(color = "Gut Section")
wUNI_gut_section_nmdsWe can see from above the plot that the stress value is ~0.02, which is very acceptable. And, It seems important to emphasize that the PCoA and the NMDS plot both look pretty similar!
In this case, I would always prefer to report the PCoA results because they are linear and provide a lot more post-hoc analyses to follow up with. In addition, it’s helpful to only have 2 axes of variation and show how much variation is explained.
(wUNI_gut_section_pcoa + theme(legend.position = "none")) +
(wUNI_gut_section_nmds + theme(legend.position = "none"))# Calculate all three of the distance matrices
scaled_sorensen_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray", binary = TRUE)
scaled_bray_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray")
scaled_wUnifrac_dist <- phyloseq::distance(scaled_rooted_physeq, method = "wunifrac")
# make a data frame from the sample_data
# All distance matrices will be the same metadata because they
# originate from the same phyloseq object.
metadata <- data.frame(sample_data(scaled_rooted_physeq))
# Adonis test
# In this example we are testing the hypothesis that the five stations
# that were collected have different centroids in the ordination space
# for each of the dissimilarity metrics, we are using a discrete variable
adonis2(scaled_sorensen_dist ~ gut_section, data = metadata)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist ~ gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.69653 0.2419 2.8718 0.002 **
## Residual 9 2.18283 0.7581
## Total 10 2.87936 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist ~ gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.74126 0.26788 3.2931 0.009 **
## Residual 9 2.02588 0.73212
## Total 10 2.76714 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.29380 0.5474 10.885 0.004 **
## Residual 9 0.24292 0.4526
## Total 10 0.53672 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Note that:
Above, we see that the most variation is explained by the weighted unifrac, which explains ~55% of the variation in the data and also has the highest F-statistic.
# We might also care about other variables
# Here, we will add date and fraction as variables
# multiplicative model ORDER MATTERS!
adonis2(scaled_sorensen_dist ~ gut_section * year, data = metadata)## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_sorensen_dist ~ gut_section * year, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.69653 0.24190 2.8962 0.006 **
## year 1 0.22776 0.07910 0.9470 0.402
## gut_section:year 1 0.27161 0.09433 1.1294 0.235
## Residual 7 1.68346 0.58467
## Total 10 2.87936 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_bray_dist ~ gut_section * year, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.74126 0.26788 3.2415 0.002 **
## year 1 0.20609 0.07448 0.9012 0.519
## gut_section:year 1 0.21904 0.07916 0.9578 0.470
## Residual 7 1.60075 0.57849
## Total 10 2.76714 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ gut_section * year, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## gut_section 1 0.29380 0.54740 9.8159 0.004 **
## year 1 0.00461 0.00859 0.1539 0.956
## gut_section:year 1 0.02879 0.05364 0.9619 0.396
## Residual 7 0.20952 0.39037
## Total 10 0.53672 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = scaled_wUnifrac_dist ~ year * gut_section, data = metadata)
## Df SumOfSqs R2 F Pr(>F)
## year 1 0.00697 0.01299 0.2329 0.879
## gut_section 1 0.29144 0.54300 9.7370 0.003 **
## year:gut_section 1 0.02879 0.05364 0.9619 0.397
## Residual 7 0.20952 0.39037
## Total 10 0.53672 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We can also run tests that include additive (+) or multipliciatve models, which include the interaction term between variables.
The PERMANOVA is sensitive to variance/dispersion in the data. Therefore, we need to run a homogeneity of dispersion test to test for the sensitivity of our PERMANOVA results to variance.
# Homogeneity of Disperson test with beta dispr
# Sorensen
beta_soren_gut_section <- betadisper(scaled_sorensen_dist, metadata$gut_section)
permutest(beta_soren_gut_section)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.008910 0.0089097 2.3074 999 0.183
## Residuals 9 0.034753 0.0038615
# Bray-curtis
beta_bray_gut_section <- betadisper(scaled_bray_dist, metadata$gut_section)
permutest(beta_bray_gut_section)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.000370 0.0003697 0.0611 999 0.807
## Residuals 9 0.054473 0.0060526
# Weighted Unifrac
beta_bray_gut_section <- betadisper(scaled_wUnifrac_dist, metadata$gut_section)
permutest(beta_bray_gut_section)##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 1 0.008195 0.0081948 1.2269 999 0.323
## Residuals 9 0.060112 0.0066791
Above, our variance is not impacted by gut section.
# Set the phylum colors
phylum_colors <- c(
Acidobacteriota = "navy",
Actinobacteriota = "darkslategray2",
Armatimonadota = "deeppink1",
Alphaproteobacteria = "plum2",
Bacteroidota = "gold",
Betaproteobacteria = "plum1",
Bdellovibrionota = "red1",
Chloroflexi="black",
Crenarchaeota = "firebrick",
Cyanobacteria = "limegreen",
Deltaproteobacteria = "grey",
Desulfobacterota="magenta",
Fibrobacterota = "darkgreen",
Bacillota = "#3E9B96",
Gammaproteobacteria = "greenyellow",
"Marinimicrobia (SAR406 clade)" = "yellow",
Myxococcota = "#B5D6AA",
Nitrospirota = "palevioletred1",
Pseudomonadota = "royalblue",
Planctomycetota = "darkorange",
Spirochaetota = "olivedrab",
Thermoplasmatota = "green",
Verrucomicrobiota = "darkorchid1")# Goal is to calculate the phylum relative abundance
# Note: the read depth must be normalized in some way: scaled_reads
phylum_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Phylum") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out phyla that are less than one percent - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
# fix the order of date
# Stacked bar plot with all Phyla
# Plot Phylum Abundances - make sure to load phylum_colors
phylum_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
facet_grid(.~gut_section) +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Plot Phylum Abundances - make sure to load phylum_colors
phylum_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Make each Phyla its own row
#fixed scale
phylum_df %>%
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~gut_section) +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# free scale
phylum_df %>%
ggplot(aes(x = names, y = Abundance, fill = Phylum)) +
facet_grid(Phylum~gut_section, scale = "free") +
geom_bar(stat = "identity", color = "black") +
labs(title = "Gut Section Phylum Composition") +
scale_fill_manual(values = phylum_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Narrow in on a specific group
# Bacteroidota - y: abundance, x: gut section, dot plot + boxplot
Bacteroidota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Bacteroidota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacteroidota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Bacteroidota_phylum# Cyanobacteria - y: abundance, x: gut section, dot plot + boxplot
Cyanobacteria_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Cyanobacteria") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Cyanobacteria Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Cyanobacteria_phylum## Warning: Computation failed in `stat_compare_means()`
## Caused by error in `kruskal.test.default()`:
## ! all observations are in the same group
# Desulfobacterota - y: abundance, x: gut section, dot plot + boxplot
Desulfobacterota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Desulfobacterota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Desulfobacterota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Desulfobacterota_phylum# Fibrobacterota - y: abundance, x: gut section, dot plot + boxplot
Fibrobacterota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Fibrobacterota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Fibrobacterota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Fibrobacterota_phylum## Warning: Computation failed in `stat_compare_means()`
## Caused by error in `kruskal.test.default()`:
## ! all observations are in the same group
# Firmicutes - y: abundance, x: gut section, dot plot + boxplot
Firmicutes_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Firmicutes") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Firmicutes Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Firmicutes_phylum# Proteobacteria - y: abundance, x: gut section, dot plot + boxplot
Proteobacteria_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Proteobacteria") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Proteobacteria Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Proteobacteria_phylum# Spirochaetota- y: abundance, x: gut section, dot plot + boxplot
Spirochaetota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Spirochaetota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Spirochaetota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)
# Verrucomicrobiota - y: abundance, x: gut section, dot plot + boxplot
Verrucomicrobiota_phylum <-
phylum_df %>%
dplyr::filter(Phylum == "Verrucomicrobiota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance, fill = gut_section, color = gut_section)) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Verrucomicrobiota Phylum Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means(method = "kruskal.test")
Verrucomicrobiota_phylumggarrange(Bacteroidota_phylum, Cyanobacteria_phylum, Desulfobacterota_phylum,
Fibrobacterota_phylum, Firmicutes_phylum, Proteobacteria_phylum,
Spirochaetota_phylum, Verrucomicrobiota_phylum)## Warning: Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Caused by error in `kruskal.test.default()`:
## ! all observations are in the same group
# Set the phylum colors
Genus_colors <- c(
Akkermansia = "navy",
"Christensenellaceae R-7 group" = "darkslategray2",
Alistipes = "deeppink1",
Aureibacter = "plum2",
Endozoicomonas = "gold",
Desulfobotulus= "magenta4",
Fibrobacter = "red1",
Desulfovibrio="black",
"dgA-11 gut group" = "firebrick",
Epulopiscium = "limegreen",
Ferrimonas = "grey",
Odoribacter ="magenta",
Rikenella = "greenyellow",
"Rikenellaceae RC9 gut group" = "yellow",
Ruminococcus = "#B5D6AA",
Romboutsia = "palevioletred1",
Treponema = "royalblue",
Vibrio = "greenyellow",
Other = "darkgray")Genus_df <-
scaled_rooted_physeq %>%
# agglomerate at the phylum level
tax_glom(taxrank = "Genus") %>%
# Transform counts to relative abundance
transform_sample_counts(function (x) {x/sum(x)}) %>%
# Melt to a long format
psmelt() %>%
# Filter out phyla that are less than one percent - get rid of low abundant Phyla
dplyr::filter(Abundance > 0.01)
# Facet by gut section
Genus_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Genus)) +
facet_grid(.~gut_section) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = Genus_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))Genus_df %>%
# Warning: Its important to have one sample per x value,
# Otherwise, it will take the sum between multiple samples
ggplot(aes(x = names, y = Abundance, fill = Genus)) +
#facet_grid(.~gut_section) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = Genus_colors) +
theme_bw() +
theme(axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1))# Bacteroidota
Genus_df %>%
dplyr::filter(Phylum == "Bacteroidota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacteroidota genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) +
stat_compare_means()## Warning: Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Computation failed in `stat_compare_means()`
## Caused by error:
## ! argument "x" is missing, with no default
# Bacillota
Genus_df %>%
dplyr::filter(Phylum == "Bacillota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Bacillota Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) # Pseudomonadota
Genus_df %>%
dplyr::filter(Phylum == "Pseudomonadota") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Pseudomonadota Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)# Akkermansia
Genus_df %>%
dplyr::filter(Genus == "Akkermansia") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Akkermansia Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors)# Desulfovibrio
Genus_df %>%
dplyr::filter(Genus == "Desulfovibrio") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Desulfovibrio Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) # dg A -11 gut group
Genus_df %>%
dplyr::filter(Genus == "dgA-11 gut group") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "dgA- 11 gut group Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) #Fibrobacter
Genus_df %>%
dplyr::filter(Genus == "Fibrobacter") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Fibrobacter Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) #Treponema
Genus_df %>%
dplyr::filter(Genus == "Treponema") %>%
# build the plot
ggplot(aes(x = gut_section, y = Abundance,
fill = gut_section, color = gut_section)) +
facet_wrap(.~Genus, scales = "free_y", nrow = 1) +
geom_boxplot(alpha = 0.5, outlier.shape = NA) +
# outliers not plotted here in boxplot
geom_jitter() +
theme_bw() +
labs(title = "Treponema Genus Abundance") +
scale_color_manual(values = gut_section_colors) +
scale_fill_manual(values = gut_section_colors) ## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31)
## os macOS Sonoma 14.5
## system x86_64, darwin20
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2024-07-30
## pandoc 3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [2] CRAN (R 4.3.0)
## ade4 1.7-22 2023-02-06 [2] CRAN (R 4.3.0)
## ape 5.7-1 2023-03-13 [2] CRAN (R 4.3.0)
## backports 1.4.1 2021-12-13 [2] CRAN (R 4.3.0)
## Biobase 2.62.0 2023-10-24 [2] Bioconductor
## BiocGenerics 0.48.1 2023-11-01 [2] Bioconductor
## biomformat 1.30.0 2023-10-24 [2] Bioconductor
## Biostrings 2.70.2 2024-01-28 [2] Bioconductor 3.18 (R 4.3.2)
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.3.0)
## broom 1.0.5 2023-06-09 [2] CRAN (R 4.3.0)
## bslib 0.6.1 2023-11-28 [2] CRAN (R 4.3.0)
## cachem 1.0.8 2023-05-01 [2] CRAN (R 4.3.0)
## car 3.1-2 2023-03-30 [1] CRAN (R 4.3.0)
## carData 3.0-5 2022-01-06 [1] CRAN (R 4.3.0)
## cli 3.6.2 2023-12-11 [2] CRAN (R 4.3.0)
## cluster 2.1.6 2023-12-01 [2] CRAN (R 4.3.0)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2)
## colorspace 2.1-0 2023-01-23 [2] CRAN (R 4.3.0)
## cowplot 1.1.3 2024-01-22 [1] CRAN (R 4.3.2)
## crayon 1.5.2 2022-09-29 [2] CRAN (R 4.3.0)
## data.table 1.15.0 2024-01-30 [2] CRAN (R 4.3.2)
## devtools * 2.4.5 2022-10-11 [2] CRAN (R 4.3.0)
## digest 0.6.34 2024-01-11 [2] CRAN (R 4.3.0)
## dplyr * 1.1.4 2023-11-17 [2] CRAN (R 4.3.0)
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.3.0)
## evaluate 0.23 2023-11-01 [2] CRAN (R 4.3.0)
## fansi 1.0.6 2023-12-08 [2] CRAN (R 4.3.0)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.3.0)
## fastmap 1.1.1 2023-02-24 [2] CRAN (R 4.3.0)
## forcats * 1.0.0 2023-01-29 [2] CRAN (R 4.3.0)
## foreach 1.5.2 2022-02-02 [2] CRAN (R 4.3.0)
## fs 1.6.3 2023-07-20 [2] CRAN (R 4.3.0)
## generics 0.1.3 2022-07-05 [2] CRAN (R 4.3.0)
## GenomeInfoDb 1.38.6 2024-02-08 [2] Bioconductor 3.18 (R 4.3.2)
## GenomeInfoDbData 1.2.11 2023-11-13 [2] Bioconductor
## ggplot2 * 3.4.4 2023-10-12 [1] CRAN (R 4.3.0)
## ggpubr * 0.6.0.999 2024-07-30 [1] Github (kassambara/ggpubr@6aeb4f7)
## ggsignif 0.6.4 2022-10-13 [1] CRAN (R 4.3.0)
## glue 1.7.0 2024-01-09 [1] CRAN (R 4.3.0)
## gtable 0.3.4 2023-08-21 [2] CRAN (R 4.3.0)
## highr 0.10 2022-12-22 [2] CRAN (R 4.3.0)
## hms 1.1.3 2023-03-21 [2] CRAN (R 4.3.0)
## htmltools 0.5.7 2023-11-03 [2] CRAN (R 4.3.0)
## htmlwidgets 1.6.4 2023-12-06 [2] CRAN (R 4.3.0)
## httpuv 1.6.14 2024-01-26 [2] CRAN (R 4.3.2)
## igraph 2.0.1.1 2024-01-30 [2] CRAN (R 4.3.2)
## IRanges 2.36.0 2023-10-24 [2] Bioconductor
## iterators 1.0.14 2022-02-05 [2] CRAN (R 4.3.0)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.3.0)
## jsonlite 1.8.8 2023-12-04 [2] CRAN (R 4.3.0)
## knitr 1.45 2023-10-30 [2] CRAN (R 4.3.0)
## labeling 0.4.3 2023-08-29 [2] CRAN (R 4.3.0)
## later 1.3.2 2023-12-06 [2] CRAN (R 4.3.0)
## lattice * 0.22-5 2023-10-24 [2] CRAN (R 4.3.0)
## lifecycle 1.0.4 2023-11-07 [2] CRAN (R 4.3.0)
## lubridate * 1.9.3 2023-09-27 [2] CRAN (R 4.3.0)
## magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.3.0)
## MASS 7.3-60.0.1 2024-01-13 [2] CRAN (R 4.3.0)
## Matrix 1.6-5 2024-01-11 [2] CRAN (R 4.3.0)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.3.0)
## mgcv 1.9-1 2023-12-21 [2] CRAN (R 4.3.0)
## mime 0.12 2021-09-28 [2] CRAN (R 4.3.0)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.3.0)
## multtest 2.58.0 2023-10-24 [2] Bioconductor
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.3.0)
## nlme 3.1-164 2023-11-27 [2] CRAN (R 4.3.0)
## pacman 0.5.1 2019-03-11 [1] CRAN (R 4.3.0)
## patchwork * 1.2.0.9000 2024-05-07 [1] Github (thomasp85/patchwork@d943757)
## permute * 0.9-7 2022-01-27 [2] CRAN (R 4.3.0)
## phyloseq * 1.46.0 2023-10-24 [2] Bioconductor
## pillar 1.9.0 2023-03-22 [2] CRAN (R 4.3.0)
## pkgbuild 1.4.3 2023-12-10 [2] CRAN (R 4.3.0)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.3.0)
## pkgload 1.3.4 2024-01-16 [2] CRAN (R 4.3.0)
## plyr 1.8.9 2023-10-02 [1] CRAN (R 4.3.0)
## profvis 0.3.8 2023-05-02 [2] CRAN (R 4.3.0)
## promises 1.2.1 2023-08-10 [2] CRAN (R 4.3.0)
## purrr * 1.0.2 2023-08-10 [2] CRAN (R 4.3.0)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.3.0)
## Rcpp 1.0.12 2024-01-09 [2] CRAN (R 4.3.0)
## RCurl 1.98-1.14 2024-01-09 [2] CRAN (R 4.3.0)
## readr * 2.1.5 2024-01-10 [2] CRAN (R 4.3.0)
## remotes 2.4.2.1 2023-07-18 [2] CRAN (R 4.3.0)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.3.0)
## rhdf5 2.46.1 2023-11-29 [2] Bioconductor
## rhdf5filters 1.14.1 2023-11-06 [2] Bioconductor
## Rhdf5lib 1.24.2 2024-02-07 [2] Bioconductor 3.18 (R 4.3.2)
## rlang 1.1.3 2024-01-10 [2] CRAN (R 4.3.0)
## rmarkdown 2.25 2023-09-18 [2] CRAN (R 4.3.0)
## rstatix * 0.7.2 2023-02-01 [1] CRAN (R 4.3.0)
## rstudioapi 0.15.0 2023-07-07 [2] CRAN (R 4.3.0)
## S4Vectors 0.40.2 2023-11-23 [2] Bioconductor
## sass 0.4.8 2023-12-06 [2] CRAN (R 4.3.0)
## scales 1.3.0 2023-11-28 [2] CRAN (R 4.3.0)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.3.0)
## shiny 1.8.0 2023-11-17 [2] CRAN (R 4.3.0)
## stringi 1.8.3 2023-12-11 [2] CRAN (R 4.3.0)
## stringr * 1.5.1 2023-11-14 [2] CRAN (R 4.3.0)
## survival 3.5-7 2023-08-14 [2] CRAN (R 4.3.0)
## tibble * 3.2.1 2023-03-20 [2] CRAN (R 4.3.0)
## tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.3.2)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.3.0)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)
## timechange 0.3.0 2024-01-18 [2] CRAN (R 4.3.0)
## tzdb 0.4.0 2023-05-12 [2] CRAN (R 4.3.0)
## urlchecker 1.0.1 2021-11-30 [2] CRAN (R 4.3.0)
## usethis * 2.2.2 2023-07-06 [2] CRAN (R 4.3.0)
## utf8 1.2.4 2023-10-22 [2] CRAN (R 4.3.0)
## vctrs 0.6.5 2023-12-01 [2] CRAN (R 4.3.0)
## vegan * 2.6-4 2022-10-11 [1] CRAN (R 4.3.0)
## withr 3.0.0 2024-01-16 [2] CRAN (R 4.3.0)
## xfun 0.42 2024-02-08 [2] CRAN (R 4.3.2)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.3.0)
## XVector 0.42.0 2023-10-24 [2] Bioconductor
## yaml 2.3.8 2023-12-11 [2] CRAN (R 4.3.0)
## zlibbioc 1.48.0 2023-10-24 [2] Bioconductor
##
## [1] /Users/cab565/Library/R/x86_64/4.3/library
## [2] /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library
##
## ──────────────────────────────────────────────────────────────────────────────